import selenium  #This is equivalent to the R command library(selenium)
from selenium import webdriver #Importing the function we need
import re
from collections import defaultdict
from bs4 import BeautifulSoup
import pandas as pd
import time
import os
import json


#Run file in the script directory

cwd = os.getcwd()
path = cwd[:-7]

os.chdir(path)

data = pd.read_excel('Data/Processed_Data/Docket_Day_Action.xlsx')
collected_cases = pd.read_excel('Data/Processed_Data/Supreme_Court_Dockets_2001_2024.xlsx')

data=data[(data['term']>24)] 
data=data[(data['term']<90)] 


temp = []
for entry in data['cleaned_docket']:
    entry = str(entry)
    entry = entry.replace('–','-')
    entry = entry.replace('-','-')
    temp.append(entry)
data['cleaned_docket']=temp

new_dockets = set(data['cleaned_docket'])-set(collected_cases['docket'])


link_start = 'https://www.supremecourt.gov/search.aspx?filename=/docket/docketfiles/html/public/'
driver = webdriver.Chrome() #Load our browser
dockets = {}
for docket in new_dockets:
    if docket not in dockets:
        docket = str(docket)
        time.sleep(1)
        if '-' in docket:
            link = link_start + docket[:2] + '-' + docket[3:]+'.html'  
        else:
            link = link_start + docket+'.html'  
        driver.get(link) 
        page_html = driver.page_source
        soup = BeautifulSoup(page_html, 'html.parser')
        t = soup.get_text()
        dockets[docket] = t
driver.quit()


with open('Data/Processed_Data/Lower_Court_Dockets/Supreme_Court_Dockets_Raw_Text_NEW.json', 'w') as f:
        json.dump(dockets, f, indent=4)